//SUMMARY:  This merges in regional data with US survey data for Ballard-Rosa, Jensen, & Scheve (2021)

clear all
set more off

//Set directory
cd "C:\Users\cambr\Dropbox\US Authoritarianism\Paper\ISQ\ISQ replication materials\Data\"

*** Prepare contextual data
clear all
insheet using ethnicity_county.csv, names

rename county countyid2010
g share_black=estimatetotalblackorafricanameri/estimatetotal 
g share_white=estimatetotalwhitealone/estimatetotal

keep countyid2010 share_black share_white
save ethnicity, replace

*** Load data
* Survey data
use us_survey, clear

svyset caseid [pweight=weight]

* Trade shock data
merge m:1 countyid2010 using cty2010_czone_shock
drop state_abb state_id state_name county_id county_name cty_fips
// 13 observations from master not matched
tab state_str if _merge==1
// Counties of 2 states not matched: Alaska, Hawaii, and DC: We don't have trade shocks for these
keep if _merge==3
drop _merge

* Contextual data
merge m:1 countyid2010 using foreignborn
keep if _merge==3
drop _merge

merge m:1 countyid2010 using countyruralareas  
drop if _merge==2
drop _merge

merge m:1 countyid2010 using ethnicity  
drop if _merge==2
drop _merge

* County-level income data by race
merge m:1 countyid2010 using "Avg racial income by county (2016).dta"
drop if _merge == 2
drop _merge


* Variables 
rename above_hs_educ college

g shock_v1=d_tradeusch_pw
lab var shock_v1 "Change in import exposure per worker, 1991-2007"
g ln_shock_v1=ln(shock_v1)
lab var ln_shock_v1 "Log of Change in import exposure per worker, 1991-2007"
g shock_v2=d_czone_imp_exp_usch_1991_2007
lab var shock_v2 "Change in CZ's average import exposure, 1991-2007"
g ln_shock_v2=ln(shock_v2)
lab var ln_shock_v2 "Log of Change in CZ's average import exposure, 1991-2007"
g initial_manu=l_shind_manuf_cbp
g shock_v2_inst=d_czone_imp_exp_otch_1991_2007
lab var shock_v2_inst "Instrument for Change in CZ's average import exposure, 1991-2007"

gen ln_shockIV = ln(shock_v2_inst)
gen highShock = (shock_v2 >= 1.77)

lab var fb_pct2015 "Percent foreign-born 2015"
lab var fb_chg_20002015 "Percent-point difference foreign-born 2000-2015"
lab var initial_manu "Labor share in manufacturing"
lab var rural_population_pct2010 "Share of pop. living in rural areas"
g large_rural_pop=(rural_population_pct2010>=20)
lab var large_rural_pop "More than 20 pct. of the pop. lives in rural areas"

gen log_totalPop2015 = log(total2015)

g urban_population=100-rural_population_pct2010
lab var urban_population "Share of pop. living in urban areas"

g diverse=(share_white<0.7) if share_white!=.
lab var diverse "Diverse population"

gen diverse_lnShock = diverse * ln_shock_v2
gen diverse_lnShockIV = diverse * ln_shockIV
gen diverse_highShock = diverse * highShock

lab var share_black "Black population share"
lab var share_white "White population share"

g diverse_shock_v2=diverse*shock_v2
lab var diverse_shock_v2 "Diverse population*Change in CZ's average import exposure, 1991-2007"

g diverse_shock_v2_inst=diverse*shock_v2_inst
lab var diverse_shock_v2_inst "Diverse population*Instrument for change in CZ's average import exposure, 1991-2007"

g same_area=(moving==0 | (moving==1 & moves_outside==0))

g non_white=(white==0)

bysort czone: egen diverse_share=mean(diverse)
bysort czone: egen diverse_cz=max(diverse)

lab var diverse_cz "Indicator for at least one diverse county in the CZ"
lab def diverse 0 "Not diverse" 1 "Diverse" 
lab val diverse_cz diverse

g diverse_cz_shock_v2=diverse_cz*shock_v2
lab var diverse_cz_shock_v2 "Diverse CZ*Change in CZ's average import exposure, 1991-2007"

g diverse_cz_shock_v2_inst=diverse_cz*shock_v2_inst
lab var diverse_cz_shock_v2_inst "Diverse CZ*Instrument for change in CZ's average import exposure, 1991-2007"

g same_area_shock_v2=same_area*shock_v2
lab var same_area_shock_v2 "Same area*Change in CZ's average import exposure, 1991-2007"

g same_area_shock_v2_inst=same_area*shock_v2_inst
lab var same_area_shock_v2_inst "Same area*Instrument for change in CZ's average import exposure, 1991-2007"

g same_area_diverse=same_area*diverse
lab var same_area_diverse "Same area*Diverse population"

g same_area_diverse_shock_v2=same_area*diverse*shock_v2
lab var same_area_diverse_shock_v2 "Same area*Diverse population*Trade shock"
g same_area_diverse_shock_v2_inst=same_area*diverse*shock_v2_inst
lab var same_area_diverse_shock_v2_inst "Same area*Diverse population*Instrument for trade shock"

save analysis_data, replace

disp "RUN NEXT:  ISQ replication.do"
